In [1]:
import numpy as np
from numpy.random import poisson
import matplotlib.pyplot as plt
from time import time
import numpy.linalg as LA
from scipy import stats
%matplotlib inline
In [2]:
def simARPoisson(T, a, mu0):
p = 2
c = np.zeros(T + 2)
mu = np.zeros(T + 2 + 1)
mu[:2 + 1] = mu0
c[:p] = poisson(mu0, size=p)
a0, a1, a2 = a
for t in range(p, T + p):
c[t] = poisson(mu[t])
ar = a0 + a1 * c[t] + a2 * c[t-1]
mu[t + 1] = np.exp(ar)
return c[p:], mu[p:]
def set_data(p, x):
temp = x.flatten()
n = len(temp[p:])
x_T = temp[p:].reshape((n, 1))
X_p = np.ones((n, p + 1))
for i in range(1, p + 1):
X_p[:, i] = temp[i - 1: i - 1 + n]
return X_p, x_T
In [3]:
np.random.seed(19)
T = 1000
a = np.array([.2, -.1, .1])
mu0 = .5
c, mu = simARPoisson(T, a, mu0)
plt.plot(c,'.', label='Countings')
plt.plot(mu, label='Mean')
plt.legend()
plt.xlabel('time')
plt.ylabel('countings')
Out[3]:
We can see from the previous plot that the mean is more or less the same in the entire process, with a noise component. We know of course that the mean is dependent on the previous counting, but we could decide to test in approximation if our counting data is poisson distributed with a parameter equal to the mean of the means through time. What we obtain is a good compatability between our data and the hypothesis. We will not do a quantitative analysis of this, but it is nice to see that our approximation is not totally off.
In [4]:
import collections
counter=collections.Counter(c)
print(counter)
plt.bar(list(counter.keys()), list(counter.values()) / np.sum(list(counter.values())), label='data')
plt.plot(np.arange(10), stats.poisson.pmf(np.arange(10), np.mean(mu), loc=0), '*r', label='approx')
plt.xlabel('countings')
plt.ylabel('p')
plt.legend()
Out[4]:
In [5]:
a0, a1, a2 = a
z = a0 + a2 * c[:-2]
x = np.arange(-.6, .4, .001)
loss = np.zeros(len(x))
for i, a1 in enumerate(x):
a[1] = a1
logmu = a0 + a1 * c[1:-1] + a2 * c[:-2]
loss[i] = np.sum((c[2:] * logmu - np.exp(logmu)))
plt.plot(x, loss)
plt.plot(x[loss==max(loss)], loss[loss==max(loss)], '*')
plt.grid()
x[loss==max(loss)]
Out[5]:
In [6]:
gamma = 0.000005
itera = 500
a1 = 0.4
a1s = np.zeros(itera)
loss = np.zeros(itera)
for i in range(itera):
logmu = a0 + a1 * c[1:-1] + a2 * c[:-2]
loss[i] = np.sum((c[2:] * logmu - np.exp(logmu)))
a1 += gamma * np.sum(c[2:] * c[1:-1] - np.exp(logmu) * c[1:-1])
a1s[i] = a1
print (a1)
In [7]:
plt.plot(loss)
plt.xlabel('iteration')
plt.ylabel('Log Likelihood')
plt.figure()
plt.plot(a1s)
plt.xlabel('iteration')
plt.ylabel('a1')
Out[7]:
In [10]:
T = 1000
np.random.seed(1)
a0 = np.array([[0], [0]])
A1 = np.array([[.2, -.2], [0., .1]])
A2 = np.array([[.1, -.1], [0., .1]])
Sigma = np.eye(2) * 0.01
x = np.zeros((2, T + 2))
for t in range(2, T + 2):
x1 = np.dot(A1, x[:, [t - 1]])
x2 = np.dot(A2, x[:, [t -2]])
x[:, [t]] = (a0 + x1 + x2 + np.random.normal(0, 0.01, size=(2, 1)))
x = x[:, 2:]
In [11]:
plt.plot(x[0, :], label='x_1')
plt.plot(x[1, :], label='x_2')
plt.xlabel('time')
x.T.shape
Out[11]:
We expect a causaity from $x_2$ to $x_1$ but not the opposite that is because the off diagonal terms used for computing $x_2$ are zero so this variable is derived only by its values in the past. For testing our hypothesis we can look for granger causality. We expect to see a great improvement in the loglikelihood for predicting $x_1$ when we add features for $x_2$ time series with up two terms in the past. On the contrary we expect no improvement for the case of $x_2$ that should be dependent on just itself.
We create a model for $x_1$ that takes into its features up to two time steps in the past for x_1 alone. The results that we obtain are close to the matrix component that they represent, which is a good sign. We later do the same adding the time series $x_2$ and we obtain again a good estimation of our parameters.
In [12]:
p = 2
X_p, x_1 = set_data(2, x[0,:])
XpXp = np.dot(X_p.T, X_p)
A = np.dot(LA.inv(XpXp), np.dot(X_p.T, x_1))
A
#X_foo, _ = set_data(2, x[1, :])
#X_p = np.hstack((X_p, X_foo[:, 1:]))
Out[12]:
In [13]:
res = x_1 - np.dot(X_p, A)
S = np.dot(res.T, res) / (T - p)
LL1 = -(T - p) / 2 * np.log(2 * np.pi) - (T - p) / 2 * \
np.log(LA.det(S)) - 1 / 2 * \
np.trace(np.dot(np.dot(res, LA.inv(S)), res.T))
LL1
Out[13]:
In [14]:
p = 2
X_foo, _ = set_data(2, x[1, :])
X_p = np.hstack((X_p, X_foo[:, 1:]))
XpXp = np.dot(X_p.T, X_p)
A = np.dot(LA.inv(XpXp), np.dot(X_p.T, x_1))
A
Out[14]:
In [15]:
res = x_1 - np.dot(X_p, A)
S = np.dot(res.T, res) / (T - p)
LL2 = -(T - p) / 2 * np.log(2 * np.pi) - (T - p) / 2 * \
np.log(LA.det(S)) - 1 / 2 * \
np.trace(np.dot(np.dot(res, LA.inv(S)), res.T))
LL2
Out[15]:
By comparing the loglikelihood of this two models we see a big improvement and we can say with good confidence that the more complex model it's better than the other.
In [16]:
com = 2 * (LL2 - LL1)
from scipy.stats import chi2
chi2.sf(com, 2, loc=0, scale=1)
Out[16]:
We do the same analysis, creating two models for the second time series and this time we can see that there is almost no improvement for the new model. We can say with good confidence that $x_1$ does not cause $x_2$
In [17]:
p = 2
X_p, x_1 = set_data(2, x[1,:])
XpXp = np.dot(X_p.T, X_p)
A = np.dot(LA.inv(XpXp), np.dot(X_p.T, x_1))
A
Out[17]:
In [18]:
res = x_1 - np.dot(X_p, A)
S = np.dot(res.T, res) / (T - p)
LL1 = -(T - p) / 2 * np.log(2 * np.pi) - (T - p) / 2 * \
np.log(LA.det(S)) - 1 / 2 * \
np.trace(np.dot(np.dot(res, LA.inv(S)), res.T))
LL1
Out[18]:
In [19]:
p = 2
#X_p, x_1 = set_data(2, x[0,:])
X_foo, _ = set_data(2, x[0, :])
X_p = np.hstack((X_p, X_foo[:, 1:]))
XpXp = np.dot(X_p.T, X_p)
A = np.dot(LA.inv(XpXp), np.dot(X_p.T, x_1))
A
Out[19]:
In [20]:
res = x_1 - np.dot(X_p, A)
S = np.dot(res.T, res) / (T - p)
LL2 = -(T - p) / 2 * np.log(2 * np.pi) - (T - p) / 2 * \
np.log(LA.det(S)) - 1 / 2 * \
np.trace(np.dot(np.dot(res, LA.inv(S)), res.T))
LL2
Out[20]:
In [21]:
com = 2 * (LL2 - LL1)
chi2.sf(com, 2, loc=0, scale=1)
Out[21]:
In [ ]: